rm(list=ls())
library(haven)
library(tidyverse)
library(labelled)
library(ggpubr)
library(MatchIt)
library(AER)
library(MASS)
library(sensemakr)

##### LOAD ASIAN BAROMETER DATA #####
# Set working directory to where datasets are stored
AB4 <- read_dta("ABS_w4.dta")

##### RECODE VARIABLES FOR ANALYSIS ##### 
###### Bilingual ###### 
AB4$ir10_original <- ifelse(AB4$ir10 == -1, NA, AB4$ir10)
AB4$Bilingual <- ifelse(AB4$ir10 %in% c(2:3), 1, ifelse(AB4$ir10 == 1, 0, NA))
AB4 <- filter(AB4, !(country %in% c(1:3,5))) # ir10 not asked in Japan, HK, Korea, and Mongolia

###### Bilingual_alt ###### 
AB4$se11_original <- ifelse(AB4$se11 == 1, "Only local",
                            ifelse(AB4$se11 %in% c(2,3,4), "Mixture",
                                   ifelse(AB4$se11 == 5, "Only official", NA)))
AB4$Bilingual_alt <- ifelse(AB4$se11 %in% c(2,3,4), 1, ifelse(AB4$se11 %in% c(1,5), 0, NA))

###### EthnicSense ###### 
AB4$EthnicSense <- ifelse(AB4$q105 %in% c(1:4), 5-AB4$q105, NA)

###### EthnicDominant ###### 
AB4$EthnicDominant <- ifelse(AB4$se11a %in% c(401, # Han in China
                                              612, # Tagalog in Philippines
                                              615, # Waray in Philippines
                                              616, # Aklanon in Philippines
                                              628, # Hiligaynon in Philippines
                                              606, # Cebuano in Philippines
                                              604, # Ilocano in Philippines
                                              636, # Pangasinense in Philippines
                                              601, # Bicol in Philippines
                                              702, # Min-nan (or Taiwanese) in Taiwan
                                              801, # Thai in Thailand,
                                              901, # Java in Indonesia
                                              1001, # Chinese in Singapore
                                              1101, # Kinh in Vietnam
                                              1201, # Khmer in Cambodia
                                              1301, # Malay in Malaysia
                                              14086), # Bamar in Myanmar
                             1, ifelse(AB4$se11a %in% c(-1,98,99), NA, 0))

###### Female ###### 
AB4$Female <- ifelse(AB4$se2 == 2, 1, ifelse(AB4$se2 == 1, 0, NA))

###### Age ###### 
AB4$Age <- ifelse(AB4$se3_2 %in% c(-1), NA, AB4$se3_2)

###### Education ###### 
AB4$Education <- ifelse(AB4$se5 %in% c(1:10),
                        dplyr::recode(AB4$se5,
                                      `1` = 1,
                                      `2` = 2,
                                      `3` = 3,
                                      `4` = 4,
                                      `6` = 4,
                                      `5` = 5,
                                      `7` = 5,
                                      `8` = 6,
                                      `9` = 7,
                                      `10` = 8),
                        NA)

###### Religion ###### 
AB4$Religion <- ifelse(AB4$se6 == 90, 0, ifelse(AB4$se6 %in% c(98,99,-1),NA,1))

###### Income ###### 
AB4$Income <- ifelse(AB4$se14 %in% c(1:5), AB4$se14, NA)

###### SocialStatus ###### 
AB4$SocialStatus <- ifelse(AB4$se12 %in% c(1:10), AB4$se12, NA)

###### Employed ###### 
AB4$Employed <- ifelse(AB4$se9 == 1, 1, ifelse(AB4$se9 == 2, 0, NA))

###### Internet ###### 
AB4$Internet <- ifelse(AB4$q47 %in% c(1:2), 2-AB4$q47, NA)

###### Urban ###### 
AB4$Urban <- ifelse(AB4$ir13 == 4, 0, ifelse(AB4$ir13 %in% c(1:3),1, NA))

###### DemBest ###### 
AB4$DemBest <- ifelse(AB4$q129 %in% c(1:4), AB4$q129, NA)

###### Trust ###### 
AB4$Trust <- ifelse(AB4$q24 %in% c(1:4), AB4$q24, NA)

###### NatPride ###### 
AB4$NatPride <- ifelse(AB4$q161 %in% c(1:4), AB4$q161, NA)

##### ETHNOLINGUISTIC DIVERSITY IN ABS ##### 
###### Prop. of Dominant Ethnic Group(s) by Country (Table OA1a.1) ###### 
prop.table(table(AB4$country, AB4$EthnicDominant), margin = 1)

###### Prop. of Bilingual Respondents (Figure OA1a.1) ###### 
language.diversity <- AB4 %>%
  group_by(country, EthnicDominant) %>%
  filter(!is.na(EthnicDominant)) %>%
  summarize(n = n(),
            Bilingual = mean(Bilingual, na.rm = T),
            Bilingual_alt = mean(Bilingual_alt, na.rm = T)) %>%
  mutate(country = to_factor(country) %>% as.character(),
         EthnicDominant = factor(ifelse(EthnicDominant == 1, "Dominant", "Non-dominant"),
                                 levels = c("Dominant", "Non-dominant")))

language.diversity.Bilingual.p <- ggplot(language.diversity, 
                                         aes(x = Bilingual, y = country, fill = EthnicDominant)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.8) +
  geom_text(aes(label = sprintf("n = %d", n)), 
            position = position_dodge(width = 0.9), 
            hjust = -0.2, 
            size = 3) +
  scale_y_discrete(name = "") +
  scale_x_continuous(name = "Bilingual: Proportion Answered in Non-Native Language/Dialect", 
                     limits = c(0,1.2),
                     breaks = seq(0, 1, 0.25),
                     labels = seq(0, 1, 0.25)) +
  scale_fill_discrete(name = "Ethnic Group") +
  theme_bw() +
  theme(legend.position = "bottom")

language.diversity.Bilingual_alt.p <- ggplot(filter(language.diversity, !is.nan(Bilingual_alt)), 
                                             aes(x = Bilingual_alt, y = country, fill = EthnicDominant)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.8) +
  geom_text(aes(label = sprintf("n = %d", n)), 
            position = position_dodge(width = 0.9), 
            hjust = -0.2, 
            size = 3) +
  scale_y_discrete(name = "") +
  scale_x_continuous(name = "Bilingual (Alternate): Proportion Speaking Mostly\nLocal/Official Language or Mixture at Home",
                     limits = c(0,1.2),
                     breaks = seq(0, 1, 0.25),
                     labels = seq(0, 1, 0.25)) +
  scale_fill_discrete(name = "Ethnic Group") +
  theme_bw() +
  theme(legend.position = "bottom")

ggarrange(language.diversity.Bilingual.p, language.diversity.Bilingual_alt.p,
          nrow = 2, common.legend = T)

###### Type of Language(s) Spoken at Home (Figure OA1a.2) ###### 
language.diversity_se11 <- AB4 %>%
  group_by(country, EthnicDominant, se11_original) %>%
  filter(!is.na(EthnicDominant) & !is.na(se11_original)) %>%
  summarize(n = n()) %>%
  mutate(country = to_factor(country) %>% as.character(),
         EthnicDominant = factor(ifelse(EthnicDominant == 1, "Dominant", "Non-dominant"),
                                 levels = c("Dominant", "Non-dominant"))) %>%
  group_by(country, EthnicDominant) %>%
  mutate(prop = n/sum(n))

language.diversity_se11.p <- ggplot(language.diversity_se11, aes(x = prop, y = country, fill = se11_original)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.8) +
  facet_wrap(~EthnicDominant) +
  scale_y_discrete(name = "") +
  scale_x_continuous(name = "Proportion of Respondents") +
  scale_fill_discrete(name = "Language Spoken At Home") +
  theme_bw() +
  theme(legend.position = "bottom")

###### Covariate Determinants of Bilingualism (Table OA1a.2) ###### 
m1.Bilingual <- lm(Bilingual ~ 
                     EthnicDominant + Female + Age + 
                     Education + Income + Internet +
                     Employed + Religion + Urban +
                     factor(country),
                   data = AB4)
summary(m1.Bilingual)

m1.Bilingual_alt <- lm(Bilingual_alt ~ 
                         EthnicDominant + Female + Age + 
                         Education + Income + Internet +
                         Employed + Religion + Urban +
                         factor(country),
                       data = AB4)
summary(m1.Bilingual_alt)

##### DESCRIPTIVE STATISTICS (Table OA1B.4) ####
sum_stats <- AB4 %>%
  dplyr::select(Bilingual,Bilingual_alt,
                EthnicSense,Female,
                Age,SocialStatus,
                Education,Religion,Employed,
                Income,Internet,
                Urban,EthnicDominant,
                DemBest,Trust,
                NatPride) %>%
  psych::describe() %>%
  data.frame() # summary statistics
sum_stats <- sum_stats[,c(2:5, 8:9,11)]

##### CEM PROCESS ####
###### Difference in Means on Raw data and after CEM (Table OA1c.5) ###### 
AB4_clean <- AB4 %>%
  dplyr::select(EthnicSense,Bilingual,EthnicDominant,Female,Age,Education,
                Income,Employed,Religion,Internet,Urban,country) %>% na.omit()

match.EthnicSense <- matchit(Bilingual ~ 
                               EthnicDominant + Female + Age + 
                               Education + Income + Internet +
                               Employed + Religion + Urban +
                               factor(country),
                             data = AB4_clean,
                             method = "cem",estimand = 'ATE',
                             cutpoints = "sturges")

bal.tab <- cbind.data.frame(Raw_Mean_Bilingual = summary(match.EthnicSense)$sum.all[,"Means Treated"],
                            Raw_Mean_Monolingual = summary(match.EthnicSense)$sum.all[,"Means Control"],
                            Raw_diff = summary(match.EthnicSense)$sum.all[,"Means Treated"] - 
                              summary(match.EthnicSense)$sum.all[,"Means Control"],
                            CEM_Mean_Bilingual = summary(match.EthnicSense)$sum.matched[,"Means Treated"],
                            CEM_Mean_Monolingual = summary(match.EthnicSense)$sum.matched[,"Means Control"],
                            CEM_diff = summary(match.EthnicSense)$sum.matched[,"Means Treated"] - 
                              summary(match.EthnicSense)$sum.matched[,"Means Control"])

##### RESULTS ##### 
###### Baseline results (Table 1 and Table OA1d.6) ###### 
m1.EthnicSense <- lm(EthnicSense ~ Bilingual + 
                       EthnicDominant + Female + Age + 
                       Education + Income + Internet +
                       Employed + Religion + Urban +
                       factor(country),
                     data = AB4)
coeftest(m1.EthnicSense, vcov = vcovHC(m1.EthnicSense, type = "HC0"))

AB4$EthnicSense_cat <- factor(AB4$EthnicSense, levels = c(1:4), ordered = T)
m2.EthnicSense <- polr(EthnicSense_cat ~ Bilingual + 
                         EthnicDominant + Female + Age + 
                         Education + Income + Internet +
                         Employed + Religion + Urban +
                         factor(country),
                       data = AB4,
                       method = "logistic",
                       Hess = T)
summary(m2.EthnicSense)

m3.EthnicSense <- lm(EthnicSense ~ Bilingual,
                     data = match.data(match.EthnicSense),
                     weights = weights)
coeftest(m3.EthnicSense, vcov = vcovCL(m3.EthnicSense, cluster = ~subclass))

###### Predicted Probabilities from Ordinal Logit (Figure OA1d.3) ###### 
newdata_ologit <- data.frame(Bilingual = c(0,1), 
                             EthnicDominant = median(AB4$EthnicDominant, na.rm = T),
                             Female = median(AB4$Female, na.rm = T),
                             Age = mean(AB4$Age, na.rm = T),
                             Education = mean(AB4$Education, na.rm = T),
                             Income = mean(AB4$Income, na.rm = T),
                             Employed = median(AB4$Employed, na.rm = T),
                             Religion = median(AB4$Religion, na.rm = T),
                             Urban = median(AB4$Urban, na.rm = T),
                             Internet = median(AB4$Internet, na.rm = T),
                             country = factor(13))

iter = 1000
prob.out.Monolingual <- data.frame(SD = rep(NA, iter),D = rep(NA, iter),
                                   A = rep(NA, iter),SA = rep(NA, iter))
prob.out.Bilingual <- data.frame(SD = rep(NA, iter),D = rep(NA, iter),
                                 A = rep(NA, iter),SA = rep(NA, iter))

set.seed(1234)
for (i in 1:iter){ # this is going to take some time to run
  boot_data <- AB4[sample(nrow(AB4),nrow(AB4), replace = T),]
  m.EthnicSense <- polr(EthnicSense_cat ~ Bilingual + 
                          EthnicDominant + Female + Age + 
                          Education + Income + Internet +
                          Employed + Religion + Urban +
                          factor(country),
                        data = boot_data,
                        method = "logistic",
                        Hess = T)
  prob.out.Monolingual[i,] <- predict(m.EthnicSense, newdata = newdata_ologit, type = "probs",)[1,]
  prob.out.Bilingual[i,] <- predict(m.EthnicSense, newdata = newdata_ologit, type = "probs",)[2,]
  print(i)
}

Bilingual <- c(0,1)
Agreement <- c("SD", "D", "A", "SA")
pred.prob.data <- expand.grid(Bilingual = Bilingual,Agreement = Agreement,
                              Prob = NA,Prob.lb = NA,Prob.ub = NA)

pred.prob.data$Prob[pred.prob.data$Bilingual == 0] <- sapply(prob.out.Monolingual, FUN = mean)
pred.prob.data$Prob.lb[pred.prob.data$Bilingual == 0] <- sapply(prob.out.Monolingual, FUN = quantile, probs = 0.025)
pred.prob.data$Prob.ub[pred.prob.data$Bilingual == 0] <- sapply(prob.out.Monolingual, FUN = quantile, probs = 0.975)
pred.prob.data$Prob[pred.prob.data$Bilingual == 1] <- sapply(prob.out.Bilingual, FUN = mean)
pred.prob.data$Prob.lb[pred.prob.data$Bilingual == 1] <- sapply(prob.out.Bilingual, FUN = quantile, probs = 0.025)
pred.prob.data$Prob.ub[pred.prob.data$Bilingual == 1] <- sapply(prob.out.Bilingual, FUN = quantile, probs = 0.975)
pred.prob.data$Prob <- as.numeric(pred.prob.data$Prob)
pred_prob.p <- ggplot(pred.prob.data, 
                      aes(x = factor(Agreement), y = Prob,
                          group = factor(Bilingual))) +
  geom_point(aes(shape = factor(Bilingual)),
             position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(x = factor(Agreement),
                    ymin = Prob.lb,
                    ymax = Prob.ub,
                    linetype = factor(Bilingual)), width = 0,
                position = position_dodge(width = 0.5)) +
  scale_y_continuous(name = "Predicted Probability") +
  scale_shape_discrete(name = "",
                       labels = c("Monolingual", "Bilingual")) +
  scale_linetype_discrete(name = "",
                          labels = c("Monolingual", "Bilingual")) +
  scale_x_discrete(name = "All citizens from different ethnic communities in\n your country are treated equally by the government",
                   labels = c("Strongly disagree",
                              "Disagree",
                              "Agree",
                              "Strongly agree")) +
  theme_bw() +
  theme(text=element_text(size=6))

##### CHANGE LEVEL OF COARSENING (Table OA1e.7) ##### 
###### Remove country origins from CEM model, "Sturges" ###### 
match.EthnicSense_noCntry <- matchit(Bilingual ~ 
                                       EthnicDominant + Female + Age + 
                                       Education + Income + Internet +
                                       Employed + Religion + Urban, 
                                     data = AB4_clean,
                                     method = "cem",estimand = 'ATE',
                                     cutpoints = "sturges")

m3_noCntry.EthnicSense <- lm(EthnicSense ~ Bilingual + factor(country),
                             data = match.data(match.EthnicSense_noCntry),
                             weights = weights)
coeftest(m3_noCntry.EthnicSense, vcov = vcovCL(m3_noCntry.EthnicSense, cluster = ~subclass))

###### Remove country origins + bin numeric into binary variables ###### 
match.EthnicSense_binary <- matchit(Bilingual ~ 
                                      EthnicDominant + Female + Age + 
                                      Education + Income + Internet +
                                      Employed + Religion + Urban, 
                                    data = AB4_clean,
                                    method = "cem",estimand = 'ATE',
                                    cutpoints=2)

m3_binary.EthnicSense <- lm(EthnicSense ~ Bilingual + factor(country),
                            data = match.data(match.EthnicSense_binary),
                            weights = weights)
coeftest(m3_binary.EthnicSense, vcov = vcovCL(m3_binary.EthnicSense, cluster = ~subclass))

##### INCLUDE SAMPLING WEIGHTS (Table OA1f.8) ##### 
m1_wts.EthnicSense <- lm(EthnicSense ~ Bilingual + 
                           EthnicDominant + Female + Age + 
                           Education + Income + Internet +
                           Employed + Religion + Urban +
                           factor(country),
                         data = AB4,
                         weights = AB4$w_cross)
coeftest(m1_wts.EthnicSense, vcov = vcovHC(m1_wts.EthnicSense, type = "HC0"))

m2_wts.EthnicSense <- polr(EthnicSense_cat ~ Bilingual + 
                             EthnicDominant + Female + Age + 
                             Education + Income + Internet +
                             Employed + Religion + Urban +
                             factor(country),
                           data = AB4,
                           weights = AB4$w_cross,
                           method = "logistic",
                           Hess = T)
summary(m2_wts.EthnicSense)

##### PLACEBO TESTS (Table OA1g.9) ##### 
###### Democracy Best ###### 
m1.DemBest <- lm(DemBest ~ Bilingual + 
                   EthnicDominant + Female + Age + 
                   Education + Income + Internet +
                   Employed + Religion + Urban +
                   factor(country),
                 data = AB4)
coeftest(m1.DemBest, vcov = vcovHC(m1.DemBest, type = "HC0"))

AB4$DemBest_cat <- factor(AB4$DemBest, levels = c(1:4))
m2.DemBest <- polr(DemBest_cat ~ Bilingual + 
                     EthnicDominant + Female + Age + 
                     Education + Income + Internet +
                     Employed + Religion + Urban +
                     factor(country),
                   data = AB4,
                   method = "logistic",
                   Hess = T)
summary(m2.DemBest)

AB4_DemBest <- AB4 %>%
  dplyr::select(DemBest,Bilingual,EthnicDominant,Female,Age,Education,
                Income,Employed,Religion,Internet,Urban,country) %>% na.omit()

match.DemBest <- matchit(Bilingual ~ 
                           EthnicDominant + Female + Age + 
                           Education + Income + Internet +
                           Employed + Religion + Urban +
                           factor(country), 
                         data = AB4_DemBest,
                         method = "cem",estimand = 'ATE',
                         cutpoints = "sturges")

m3.DemBest <- lm(DemBest ~ Bilingual,
                 data = match.data(match.DemBest),
                 weights = weights)
coeftest(m3.DemBest, vcov = vcovCL(m3.DemBest, cluster = ~subclass))

###### Trustworthy ###### 
m1.Trust <- lm(Trust ~ Bilingual +
                 EthnicDominant + Female + Age + 
                 Education + Income + Internet +
                 Employed + Religion + Urban +
                 factor(country),
               data = AB4)
coeftest(m1.Trust, vcov = vcovHC(m1.Trust, type = "HC0"))

AB4$Trust_cat <- factor(AB4$Trust, levels = c(1:4))
m2.Trust <- polr(Trust_cat ~ Bilingual + 
                   EthnicDominant + Female + Age + 
                   Education + Income + Internet +
                   Employed + Religion + Urban +
                   factor(country),
                 data = AB4,
                 method = "logistic",
                 Hess = T)
summary(m2.Trust)

AB4_Trust <- AB4 %>%
  dplyr::select(Trust,Bilingual,EthnicDominant,Female,Age,Education,
                Income,Employed,Religion,Internet,Urban,country) %>% na.omit()

match.Trust <- matchit(Bilingual ~ 
                         EthnicDominant + Female + Age + 
                         Education + Income + Internet +
                         Employed + Religion + Urban +
                         factor(country), 
                       data = AB4_Trust,
                       method = "cem",estimand = 'ATE',
                       cutpoints = "sturges")

m3.Trust <- lm(Trust ~ Bilingual,
               data = match.data(match.Trust),
               weights = weights)
coeftest(m3.Trust, vcov = vcovCL(m3.Trust, cluster = ~subclass))

###### National Pride ###### 
m1.NatPride <- lm(NatPride ~ Bilingual +
                    EthnicDominant + Female + Age + 
                    Education + Income + Internet +
                    Employed + Religion + Urban +
                    factor(country),
                  data = AB4)
coeftest(m1.NatPride, vcov = vcovHC(m1.NatPride, type = "HC0"))

AB4$NatPride_cat <- factor(AB4$NatPride, levels = c(1:4))
m2.NatPride <- polr(NatPride_cat ~ Bilingual + 
                      EthnicDominant + Female + Age + 
                      Education + Income + Internet +
                      Employed + Religion + Urban +
                      factor(country),
                    data = AB4,
                    method = "logistic",
                    Hess = T)
summary(m2.NatPride)

AB4_NatPride <- AB4 %>%
  dplyr::select(NatPride,Bilingual,EthnicDominant,Female,Age,Education,
                Income,Employed,Religion,Internet,Urban,country) %>% na.omit()

match.NatPride <- matchit(Bilingual ~ 
                            EthnicDominant + Female + Age + 
                            Education + Income + Internet +
                            Employed + Religion + Urban +
                            factor(country), 
                          data = AB4_NatPride,
                          method = "cem",estimand = 'ATE',
                          cutpoints = "sturges")

m3.NatPride <- lm(NatPride ~ Bilingual,
                  data = match.data(match.NatPride),
                  weights = weights)
coeftest(m3.NatPride, vcov = vcovCL(m3.NatPride, cluster = ~subclass))

##### ALTERNATIVE MEASURE OF BILINGUALISM (Table OA1h.10) ##### 
m1_Bilingual_alt.EthnicSense <- lm(EthnicSense ~ Bilingual_alt + 
                                     EthnicDominant + Female + Age + 
                                     Education + Income + Internet +
                                     Employed + Religion + Urban +
                                     factor(country),
                                   data = AB4)
coeftest(m1_Bilingual_alt.EthnicSense, vcov = vcovHC(m1_Bilingual_alt.EthnicSense, type = "HC0"))

m2_Bilingual_alt.EthnicSense <- polr(EthnicSense_cat ~ Bilingual_alt + 
                                       EthnicDominant + Female + Age + 
                                       Education + Income + Internet +
                                       Employed + Religion + Urban +
                                       factor(country),
                                     data = AB4,
                                     method = "logistic",
                                     Hess = T)
summary(m2_Bilingual_alt.EthnicSense)

AB4_Bilingual_alt <- AB4 %>%
  dplyr::select(EthnicSense,Bilingual_alt,EthnicDominant,Female,Age,Education,
                Income,Employed,Religion,Internet,Urban,country) %>% na.omit()

match.EthnicSense_Bilingual_alt <- matchit(Bilingual_alt ~ 
                                             EthnicDominant + Female + Age + 
                                             Education + Income + Internet +
                                             Employed + Religion + Urban +
                                             factor(country), 
                                           data = AB4_Bilingual_alt,
                                           method = "cem",estimand = 'ATE',
                                           cutpoints = "sturges")

m3_Bilingual_alt.EthnicSense <- lm(EthnicSense ~ Bilingual_alt,
                                   data = match.data(match.EthnicSense_Bilingual_alt),
                                   weights = weights)
coeftest(m3_Bilingual_alt.EthnicSense, vcov = vcovCL(m3_Bilingual_alt.EthnicSense, cluster = ~subclass))

##### CONTROL FOR SOCIAL STATUS (Table OA1i.11) ##### 
m1_SocialStatus.EthnicSense <- lm(EthnicSense ~ Bilingual + 
                                    EthnicDominant + Female + Age + 
                                    Education + Income + SocialStatus + Internet +
                                    Employed + Religion + Urban +
                                    factor(country),
                                  data = AB4)
coeftest(m1_SocialStatus.EthnicSense, vcov = vcovHC(m1_SocialStatus.EthnicSense, type = "HC0"))

m2_SocialStatus.EthnicSense <- polr(EthnicSense_cat ~ Bilingual + 
                                      EthnicDominant + Female + Age + 
                                      Education + Income + SocialStatus + Internet +
                                      Employed + Religion + Urban +
                                      factor(country),
                                    data = AB4,
                                    method = "logistic",
                                    Hess = T)
summary(m2_SocialStatus.EthnicSense)

AB4_SocialStatus <- AB4 %>%
  dplyr::select(EthnicSense,Bilingual,EthnicDominant,Female,Age,Education,
                Income,SocialStatus,Employed,Religion,Internet,Urban,country) %>% na.omit()

match.EthnicSense_SocialStatus <- matchit(Bilingual ~ 
                                            EthnicDominant + Female + Age + 
                                            Education + Income + SocialStatus + Internet +
                                            Employed + Religion + Urban +
                                            factor(country), 
                                          data = AB4_SocialStatus,
                                          method = "cem",estimand = 'ATE',
                                          cutpoints = 5) # cutpt == 5 used to prevent too many unmatched obs (approx same N as Table 1)

m3_SocialStatus.EthnicSense <- lm(EthnicSense ~ Bilingual,
                                  data = match.data(match.EthnicSense_SocialStatus),
                                  weights = weights)
coeftest(m3_SocialStatus.EthnicSense, vcov = vcovCL(m3_SocialStatus.EthnicSense, cluster = ~subclass))

##### SENSITIVITY ANALYSIS ##### 
m1.EthnicSense.sensitivity <- sensemakr(model = m1.EthnicSense, 
                                        treatment = "Bilingual",
                                        benchmark_covariates = "EthnicDominant",
                                        alpha = 0.1,
                                        kd = seq(1,5,1))

###### Sensitivity of OLS Model to Unobservables (Table OA1j.12) ###### 
ovb_minimal_reporting(m1.EthnicSense.sensitivity, format = "latex", digits = 4) 

###### Benchmarking Sensitivity of Main Predictor (Table OA1j.13) ###### 
summary(m1.EthnicSense.sensitivity)

##### DISAGGREGATING EFFECTS BY ETHNICITY (Table OA1k.14) ##### 
###### Dominant Ethnic Group = 1 ###### 
m1.EthnicSense_dominant <- lm(EthnicSense ~ Bilingual + 
                                EthnicDominant + Female + Age + 
                                Education + Income + Internet +
                                Employed + Religion + Urban +
                                factor(country),
                              data = filter(AB4, EthnicDominant == 1))
coeftest(m1.EthnicSense_dominant, vcov = vcovHC(m1.EthnicSense_dominant, type = "HC0"))

m2.EthnicSense_dominant <- polr(EthnicSense_cat ~ Bilingual + 
                                  EthnicDominant + Female + Age + 
                                  Education + Income + Internet +
                                  Employed + Religion + Urban +
                                  factor(country),
                                data = filter(AB4, EthnicDominant == 1),
                                method = "logistic",
                                Hess = T)
summary(m2.EthnicSense_dominant)

###### Dominant Ethnic Group = 0 ###### 
m1.EthnicSense_nondominant <- lm(EthnicSense ~ Bilingual + 
                                   EthnicDominant + Female + Age + 
                                   Education + Income + Internet +
                                   Employed + Religion + Urban +
                                   factor(country),
                                 data = filter(AB4, EthnicDominant == 0))
coeftest(m1.EthnicSense_nondominant, vcov = vcovHC(m1.EthnicSense_nondominant, type = "HC0"))

m2.EthnicSense_nondominant <- polr(EthnicSense_cat ~ Bilingual + 
                                     EthnicDominant + Female + Age + 
                                     Education + Income + Internet +
                                     Employed + Religion + Urban +
                                     factor(country),
                                   data = filter(AB4, EthnicDominant == 0),
                                   method = "logistic",
                                   Hess = T)
summary(m2.EthnicSense_nondominant)










